We will make use of a simulated dataset with \(p = 25\) and \(n = 1000\). We simulate the dataset using the following chunk of code.
Pima <- rbind(MASS::Pima.tr, MASS::Pima.te)
y <- as.numeric(Pima$type == "Yes") # Binary outcome
X <- cbind(1, model.matrix(type ~ . - 1, data = Pima)) # Design matrixAs before, we will employ a relatively vague prior centered at \(0\), namely \(\beta \sim N(0, 100 I_p)\). Then, we implement the log-likelihood, the log-posterior and the gradient of the likelihood.
# Log-likelihood of a logistic regression model
loglik <- function(beta, y, X) {
eta <- c(X %*% beta)
sum(y * eta - log(1 + exp(eta)))
}
# Log-posterior
logpost <- function(beta, y, X) {
loglik(beta, y, X) + sum(dnorm(beta, 0, 10, log = T))
}
# Gradient of the logposterior
lgradient <- function(beta, y, X) {
probs <- plogis(c(X %*% beta))
loglik_gr <- c(crossprod(X, y - probs))
prior_gr <- -beta / 100
loglik_gr + prior_gr
}
# Summary table
summary_tab <- matrix(0, nrow = 6, ncol = 4)
colnames(summary_tab) <- c("Seconds", "Average ESS", "Average ESS per sec", "Average acceptance rate")
rownames(summary_tab) <- c("MH Laplace + Rcpp", "MALA", "MALA tuned", "HMC", "STAN", "Pólya-Gamma")library(coda)
R <- 30000
burn_in <- 5000set.seed(123)
# Covariance matrix is selected via laplace approximation
fit_logit <- glm(y ~ X - 1, family = binomial(link = "logit"))
p <- ncol(X)
S <- 2.38^2 * vcov(fit_logit) / p
# Running the MCMC
start.time <- Sys.time()
# MCMC
fit_MCMC <- as.mcmc(RMH_arma(R, burn_in, y, X, S)) # Convert the matrix into a "coda" object
end.time <- Sys.time()
time_in_sec <- as.numeric(difftime(end.time, start.time, units = "secs"))
# Diagnostic
summary(effectiveSize(fit_MCMC)) # Effective sample size
Min. 1st Qu. Median Mean 3rd Qu. Max.
1103 1154 1163 1166 1192 1204
summary(R / effectiveSize(fit_MCMC)) # Integrated autocorrelation time
Min. 1st Qu. Median Mean 3rd Qu. Max.
24.93 25.16 25.79 25.75 26.01 27.19
summary(1 - rejectionRate(fit_MCMC)) # Acceptance rate
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.2682 0.2682 0.2682 0.2682 0.2682 0.2682
# Summary statistics
summary_tab[1, ] <- c(
time_in_sec, mean(effectiveSize(fit_MCMC)),
mean(effectiveSize(fit_MCMC)) / time_in_sec,
1 - mean(rejectionRate(fit_MCMC))
)
# Traceplot of the intercept
plot(fit_MCMC[, 1:2])# R represent the number of samples
# burn_in is the number of discarded samples
# epsilon, S are tuning parameter
MALA <- function(R, burn_in, y, X, epsilon, S) {
p <- ncol(X)
out <- matrix(0, R, p) # Initialize an empty matrix to store the values
beta <- rep(0, p) # Initial values
A <- chol(S) # Cholesky of S
S1 <- solve(S) # Inverse of S
lgrad <- c(S %*% lgradient(beta, y, X)) # Compute the gradient
logp <- logpost(beta, y, X)
sigma2 <- epsilon^2 / p^(1 / 3)
sigma <- sqrt(sigma2)
# Starting the Gibbs sampling
for (r in 1:(burn_in + R)) {
beta_new <- beta + sigma2 / 2 * lgrad + sigma * c(crossprod(A, rnorm(p)))
logpnew <- logpost(beta_new, y, X)
lgrad_new <- c(S %*% lgradient(beta_new, y, X))
diffold <- beta - beta_new - sigma2 / 2 * lgrad_new
diffnew <- beta_new - beta - sigma2 / 2 * lgrad
qold <- -diffold %*% S1 %*% diffold / (2 * sigma2)
qnew <- -diffnew %*% S1 %*% diffnew / (2 * sigma2)
alpha <- min(1, exp(logpnew - logp + qold - qnew))
if (runif(1) < alpha) {
logp <- logpnew
lgrad <- lgrad_new
beta <- beta_new # Accept the value
}
# Store the values after the burn-in period
if (r > burn_in) {
out[r - burn_in, ] <- beta
}
}
out
}set.seed(123)
epsilon <- 0.0017 # After some trial ad error
# Running the MCMC
start.time <- Sys.time()
fit_MCMC <- as.mcmc(MALA(R = R, burn_in = burn_in, y, X, epsilon, S = diag(ncol(X)))) # Convert the matrix into a "coda" object
end.time <- Sys.time()
time_in_sec <- as.numeric(difftime(end.time, start.time, units = "secs"))
# Diagnostic
summary(effectiveSize(fit_MCMC)) # Effective sample size
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.900 9.358 27.201 44.321 46.238 166.223
summary(R / effectiveSize(fit_MCMC)) # Integrated autocorrelation time
Min. 1st Qu. Median Mean 3rd Qu. Max.
180.5 728.7 1208.5 2671.3 3283.2 10343.4
summary(1 - rejectionRate(fit_MCMC)) # Acceptance rate
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.5638 0.5638 0.5638 0.5638 0.5638 0.5638
# Summary statistics
summary_tab[2, ] <- c(
time_in_sec, mean(effectiveSize(fit_MCMC)),
mean(effectiveSize(fit_MCMC)) / time_in_sec,
1 - mean(rejectionRate(fit_MCMC))
)
# Traceplot of the intercept
plot(fit_MCMC[, 1:2])library(coda)
R <- 30000
burn_in <- 5000set.seed(123)
epsilon <- 1.68 # After some trial ad error
# Covariance matrix is selected via laplace approximation
fit_logit <- glm(y ~ X - 1, family = binomial(link = "logit"))
S <- vcov(fit_logit)
# Running the MCMC
start.time <- Sys.time()
fit_MCMC <- as.mcmc(MALA(R = R, burn_in = burn_in, y, X, epsilon, S)) # Convert the matrix into a "coda" object
end.time <- Sys.time()
time_in_sec <- as.numeric(difftime(end.time, start.time, units = "secs"))
# Diagnostic
summary(effectiveSize(fit_MCMC)) # Effective sample size
Min. 1st Qu. Median Mean 3rd Qu. Max.
8583 8762 9196 9063 9312 9409
summary(R / effectiveSize(fit_MCMC)) # Integrated autocorrelation time
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.189 3.222 3.263 3.314 3.424 3.495
summary(1 - rejectionRate(fit_MCMC)) # Acceptance rate
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.5686 0.5686 0.5686 0.5686 0.5686 0.5686
# Summary statistics
summary_tab[3, ] <- c(
time_in_sec, mean(effectiveSize(fit_MCMC)),
mean(effectiveSize(fit_MCMC)) / time_in_sec,
1 - mean(rejectionRate(fit_MCMC))
)
# Traceplot of the intercept
plot(fit_MCMC[, 1:2])HMC <- function(R, burn_in, y, X, epsilon, S, L = 10) {
p <- ncol(X)
out <- matrix(0, R, p) # Initialize an empty matrix to store the values
beta <- rep(0, p) # Initial values
logp <- logpost(beta, y, X) # Initial log-posterior
S1 <- solve(S)
A1 <- chol(S1)
# Starting the Gibbs sampling
for (r in 1:(burn_in + R)) {
P <- c(crossprod(A1, rnorm(p))) # Auxiliary variables
logK <- c(P %*% S %*% P / 2) # sum(P^2) / 2 # Kinetic energy at the beginning of the trajectory
# Make a half step for momentum at the beginning
beta_new <- beta
Pnew <- P + epsilon * lgradient(beta_new, y, X) / 2
# Alternate full steps for position and momentum
for (l in 1:L) {
# Make a full step for the position
beta_new <- beta_new + epsilon * c(S %*% Pnew)
# Make a full step for the momentum, except at end of trajectory
if (l != L) Pnew <- Pnew + epsilon * lgradient(beta_new, y, X)
}
# Make a half step for momentum at the end.
Pnew <- Pnew + epsilon * lgradient(beta_new, y, X) / 2
# Negate momentum at end of trajectory to make the proposal symmetric
Pnew <- - Pnew
# Evaluate potential and kinetic energies at the end of trajectory
logpnew <- logpost(beta_new, y, X)
logKnew <- Pnew %*% S %*% Pnew / 2 #sum(Pnew^2) / 2
# Accept or reject the state at end of trajectory, returning either
# the position at the end of the trajectory or the initial position
if (runif(1) < exp(logpnew - logp + logK - logKnew)) {
logp <- logpnew
beta <- beta_new # Accept the value
}
# Store the values after the burn-in period
if (r > burn_in) {
out[r - burn_in, ] <- beta
}
}
out
}set.seed(123)
epsilon <- 0.25 # After some trial ad error
L <- 10
# Covariance matrix is selected via laplace approximation
fit_logit <- glm(y ~ X - 1, family = binomial(link = "logit"))
S <- vcov(fit_logit)
# Running the MCMC
start.time <- Sys.time()
fit_MCMC <- as.mcmc(HMC(R = R, burn_in = burn_in, y, X, epsilon, S, L)) # Convert the matrix into a "coda" object
end.time <- Sys.time()
time_in_sec <- as.numeric(difftime(end.time, start.time, units = "secs"))
# Diagnostic
summary(effectiveSize(fit_MCMC)) # Effective sample size
Min. 1st Qu. Median Mean 3rd Qu. Max.
215765 222946 226610 225565 228360 233334
summary(R / effectiveSize(fit_MCMC)) # Integrated autocorrelation time
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1286 0.1314 0.1324 0.1331 0.1346 0.1390
summary(1 - rejectionRate(fit_MCMC)) # Acceptance rate
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.9892 0.9892 0.9892 0.9892 0.9892 0.9892
# Summary statistics
summary_tab[4, ] <- c(
time_in_sec, mean(effectiveSize(fit_MCMC)),
mean(effectiveSize(fit_MCMC)) / time_in_sec,
1 - mean(rejectionRate(fit_MCMC))
)
# Traceplot of the intercept
plot(fit_MCMC[, 1:2])library(rstan)
# I am not counting the compilation time
stan_compiled <- stan_model(file = "logistic.stan") # Stan programset.seed(1234)
# Running the MCMC
start.time <- Sys.time()
fit_HMC <- sampling(
stan_compiled, # The stan file has been previously compiled
data = list(X = X, y = y, n = nrow(X), p = ncol(X)), # named list of data
chains = 1, # number of Markov chains
warmup = burn_in, # Burn-in iterations per chain
iter = R + burn_in # Total number of iterations per chain
)
SAMPLING FOR MODEL 'logistic' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 9.6e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.96 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 35000 [ 0%] (Warmup)
Chain 1: Iteration: 3500 / 35000 [ 10%] (Warmup)
Chain 1: Iteration: 5001 / 35000 [ 14%] (Sampling)
Chain 1: Iteration: 8500 / 35000 [ 24%] (Sampling)
Chain 1: Iteration: 12000 / 35000 [ 34%] (Sampling)
Chain 1: Iteration: 15500 / 35000 [ 44%] (Sampling)
Chain 1: Iteration: 19000 / 35000 [ 54%] (Sampling)
Chain 1: Iteration: 22500 / 35000 [ 64%] (Sampling)
Chain 1: Iteration: 26000 / 35000 [ 74%] (Sampling)
Chain 1: Iteration: 29500 / 35000 [ 84%] (Sampling)
Chain 1: Iteration: 33000 / 35000 [ 94%] (Sampling)
Chain 1: Iteration: 35000 / 35000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 10.9396 seconds (Warm-up)
Chain 1: 67.1344 seconds (Sampling)
Chain 1: 78.0739 seconds (Total)
Chain 1:
end.time <- Sys.time()
time_in_sec <- as.numeric(difftime(end.time, start.time, units = "secs"))
fit_HMC <- as.mcmc(extract(fit_HMC)$beta)
# Diagnostic
summary(effectiveSize(fit_HMC)) # Effective sample size
Min. 1st Qu. Median Mean 3rd Qu. Max.
28917 30000 30000 29865 30000 30000
summary(R / effectiveSize(fit_HMC)) # Integrated autocorrelation time
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 1.000 1.005 1.000 1.037
summary(1 - rejectionRate(fit_HMC)) # Acceptance rate
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 1 1 1 1
# Summary statistics
summary_tab[5, ] <- c(
time_in_sec, mean(effectiveSize(fit_HMC)),
mean(effectiveSize(fit_HMC)) / time_in_sec,
1 - mean(rejectionRate(fit_HMC))
)
# Traceplot of the intercept
plot(fit_HMC[, 1:2])library(BayesLogit)
logit_Gibbs <- function(R, burn_in, y, X, B, b) {
p <- ncol(X)
n <- nrow(X)
out <- matrix(0, R, p) # Initialize an empty matrix to store the values
P <- solve(B) # Prior precision matrix
Pb <- P %*% b # Term appearing in the Gibbs sampling
Xy <- crossprod(X, y - 1 / 2)
# Initialization
beta <- rep(0, p)
# Iterative procedure
for (r in 1:(R + burn_in)) {
# Sampling the Pólya-gamma latent variables
eta <- c(X %*% beta)
omega <- rpg.devroye(num = n, h = 1, z = eta)
# Sampling beta
eig <- eigen(crossprod(X * sqrt(omega)) + P, symmetric = TRUE)
Sigma <- crossprod(t(eig$vectors) / sqrt(eig$values))
mu <- Sigma %*% (Xy + Pb)
A1 <- t(eig$vectors) / sqrt(eig$values)
beta <- mu + c(matrix(rnorm(1 * p), 1, p) %*% A1)
# Store the values after the burn-in period
if (r > burn_in) {
out[r - burn_in, ] <- beta
}
}
out
}B <- diag(100, ncol(X)) # Prior covariance matrix
b <- rep(0, ncol(X)) # Prior meanset.seed(123)
# Running the MCMC
start.time <- Sys.time()
fit_MCMC <- as.mcmc(logit_Gibbs(R, burn_in, y, X, B, b)) # Convert the matrix into a "coda" object
end.time <- Sys.time()
time_in_sec <- as.numeric(difftime(end.time, start.time, units = "secs"))
# Diagnostic
summary(effectiveSize(fit_MCMC)) # Effective sample size
Min. 1st Qu. Median Mean 3rd Qu. Max.
10018 13592 15411 15182 17369 18900
summary(R / effectiveSize(fit_MCMC)) # Integrated autocorrelation time
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.587 1.728 1.950 2.051 2.209 2.995
summary(1 - rejectionRate(fit_MCMC)) # Acceptance rate
Min. 1st Qu. Median Mean 3rd Qu. Max.
1 1 1 1 1 1
# Summary statistics
summary_tab[6, ] <- c(
time_in_sec, mean(effectiveSize(fit_MCMC)),
mean(effectiveSize(fit_MCMC)) / time_in_sec,
1 - mean(rejectionRate(fit_MCMC))
)
# Traceplot of the intercept
plot(fit_MCMC[, 1:2])| Seconds | Average ESS | Average ESS per sec | Average acceptance rate | |
|---|---|---|---|---|
| MH Laplace + Rcpp | 0.561887 | 1165.75640 | 2074.71670 | 0.2682089 |
| MALA | 2.743667 | 44.32131 | 16.15404 | 0.5637855 |
| MALA tuned | 2.624531 | 9063.31983 | 3453.31022 | 0.5686190 |
| HMC | 14.735173 | 225565.16881 | 15307.94169 | 0.9892330 |
| STAN | 78.846092 | 29864.58683 | 378.77067 | 1.0000000 |
| Pólya-Gamma | 10.254378 | 15181.91528 | 1480.53009 | 1.0000000 |